\documentclass[12pt,reqno]{article}

\usepackage[usenames]{color}
\usepackage{amssymb}
\usepackage{graphicx}
\usepackage{amscd}

\usepackage[colorlinks=true,
linkcolor=webgreen,
filecolor=webbrown,
citecolor=webgreen]{hyperref}

\definecolor{webgreen}{rgb}{0,.5,0}
\definecolor{webbrown}{rgb}{.6,0,0}

\usepackage{color}
\usepackage{fullpage}
\usepackage{float}

\usepackage{psfig}
\usepackage{graphics,amsmath,amssymb}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{latexsym}
\usepackage{epsf}

\setlength{\textwidth}{6.5in}
\setlength{\oddsidemargin}{.1in}
\setlength{\evensidemargin}{.1in}
\setlength{\topmargin}{-.5in}
\setlength{\textheight}{8.9in}

\newcommand{\twolinesum}[2]{\sum_{\substack{{\scriptstyle #1}\\
{\scriptstyle #2}}}}

\def\cp {\mathcal{P}}
\def\cm {{\cal M}}
\def\ci {{\cal I}}
\newcommand{\gmod}[1]{\, ({\rm{mod}} \, #1)}

\newcommand{\seqnum}[1]{\href{http://oeis.org/#1}{\underline{#1}}}

\begin{document}

\begin{center}
\epsfxsize=4in
\leavevmode\epsffile{logo129.eps}
\end{center}

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}

\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{conjecture}[theorem]{Conjecture}

\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}


\begin{center}
\vskip 1cm{\LARGE\bf Counting Primes whose Sum of \\
\vskip .1in
Digits is Prime}
\vskip 1cm
\large
Glyn Harman\\
Department of Mathematics\\
Royal Holloway, University of London\\
Egham\\
Surrey TW20 0EX\\
United Kingdom\\
\href{mailto:g.harman@rhul.ac.uk}{\tt g.harman@rhul.ac.uk} \\
\end{center}

\vskip .2 in

\begin{abstract}
Motivated by recent work of Drmota, Mauduit and Rivat, we discuss the
possibility of counting the number of primes up to $x$ whose sum of
digits is also prime.  We show that, although this is not possible
unless we assume a hypothesis on the distribution of primes stronger
than what is implied by the Riemann hypothesis, we can establish a
Mertens-type result.  That is, we obtain a formula for the number of
such primes $p$ up to $x$ weighted with a factor $1/p$.  Indeed, we can
iterate the method and count primes with the sum of digits a prime
whose sum of digits is a prime, and so on.
\end{abstract}

\section{Introduction}
\label{intro}
Thanks to the revolutionary work of Mauduit and Rivat \cite{Mau}, in particular as developed in \cite{Drm}, our knowledge of the distribution of 
primes with constraints on their sum of digits has undergone a substantial expansion in recent years. The question of counting the number of primes
up to $x$, whose sum of digits in base $b$ is also prime, would have been considered far too difficult ten years ago
in view of the difficulty in restricting the sum of digits function when its variable is prime.  In this paper we discuss this question, showing
that it is still impossibly difficult at present, but this is now a result of our lack of knowledge 
concerning the distribution of primes in short intervals,
and no longer caused by the problems of restricting the sum of digits function.  However, we are able to prove a Mertens-type 
result on counting primes whose sum of digits is prime.
Throughout this paper the letters $p$ and $q$ are reserved for prime variables. In 1874 Mertens proved that
\[
\sum_{p<X} \frac{1}{p} = \log \log X + A + O((\log x)^{-1}),
\]
for a certain constant $A$ (see \cite[p.\ 56]{5}).  In this paper we will obtain analogues of this result
for primes whose sum of digits written in base $b$ ($b \ge 2$) are also prime.  For brevity we shall write this set as $\cp_b$.
If
\[
n = \sum_{k=0}^r a_k b^k, \ \ 0 \le a_k \le b-1, \ \ \text{then we put} \ \sigma_b(n) = \sum_{k=0}^r a_k
\]
for the sum of digits function in base $b$. In fact we shall be able to iterate our working 
and establish Mertens-type theorems for primes belonging to sets defined as follows:
\[
\cp_b^{(1)} = \cp_b, \qquad \cp_b^{(j)} = \{p \in \cp_b: \sigma_b(p) \in \cp_b^{(j-1)}\}, \ j=2, 3 \ldots.
\]
We write $\log_bx$ for the logarithm to base $b$, $\log x$ for the natural logarithm, and define $L_j(x)$ inductively by
\[
L_1(x) = \max(1,\log x), \qquad L_j(x) = L_1(L_{j-1}(x)) \ \ \text{for} \ j \ge 2.
\]

Our starting point will be a remarkable theorem of Drmota, Mauduit and Rivat \cite{Drm}.
Before stating the result precisely
we need the following notation:
\[
s_b = {\frac{b^2-1}{12}}, \quad \mu_b = \tfrac12 (b-1), \quad S(k,x) = \left|\{p \le x: \sigma_b(p) = k  \}\right|.
\]
As usual we write $\phi(n)$ for Euler's totient function, and $\pi(x)$ for the number of primes up to $x$.  We recall the prime number theorem in
the form
\begin{equation}\label{PNT}
\pi(x) = \frac{x}{\log x} + O\left(\frac{x}{(\log x)^2} \right)\, .
\end{equation}

\begin{theorem}[Drmota/Mauduit/Rivat]\label{DMR}  We have, uniformly for all integers $k \ge 0$ with $(k,b-1) = 1$,
\begin{equation}\label{DMJ}
S(k,x) = \frac{b-1}{\phi(b-1)} \frac{\pi(x)}{\sqrt{2 \pi s_b \log_b x}}\left(\exp\left(- \frac{(k-\mu_b \log_b x)^2}{2 s_b \log_b x}  \right)
+ O\left((\log x)^{- \frac12 + \epsilon}  \right)  \right) \, ,
\end{equation}
where $\epsilon > 0$ is arbitrary but fixed.
\end{theorem}


Since $\sigma_b(p) \equiv p \gmod{b-1}$, for all but the finitely many primes $p$ satisfying $(p,b-1)>1$ we have $(\sigma_b(p),b-1)=1$, 
and this leads to the
$(b-1)/\phi(b-1)$ factor appearing frequently.  From the above result we can deduce some very interesting corollaries.
Clearly, \eqref{DMJ} shows that every sufficiently large integer is the sum of digits of a prime number. So, in particular, there are infinitely many
primes whose sum of digits is also prime.  Also, every sufficiently large prime is the sum of digits of another prime which in turn is
the sum of digits of another prime, and so on.  We use Theorem~\ref{DMR} to obtain a Mertens-type result as follows.

\begin{theorem}\label{T2}
Let $b \ge 2$ be an integer and $X \ge 2$.  Then  we have
\begin{equation}
\twolinesum{p < X}{p \in \cp_b} \frac{1}{p} = \frac{b-1}{\phi(b-1)} L_3(X) + O(1).
\end{equation}
\end{theorem}

In fact we shall use Theorem~\ref{T2} as the first step in an inductive argument to
establish the following more general result for the sets $\cp_b^{(j)}$.

\begin{theorem}\label{T3}
Let $b,j \ge 2$ be integers and $X \ge 2$.  Then  we have
\begin{equation}
\twolinesum{p < X}{p \in \cp_b^{(j)}} \frac{1}{p} = \left(\frac{b-1}{\phi(b-1)}\right)^{j} L_{j+2}(X) + O(1).
\end{equation}
\end{theorem}

As a final application of Theorem~\ref{DMR} we leave the following for the reader to prove.

\begin{theorem}\label{T4}
Write $\cp_b^*$ for the set of primes in $\cp_b$ which have a prime number of digits. Then
\[
\twolinesum{p < X}{p \in \cp_b^*} \frac{L_2(p)}{p} = \frac{b-1}{\phi(b-1)} L_3(X) + O(1).
\]
\end{theorem}

\section{The problem of the unweighted sum}
In this section we briefly consider the sum
\[
\pi_b^*(X) = \twolinesum{p < X}{p \in \cp_b} 1 \, .
\]
First we note the following result of K\'atai \cite{Kat}.
\begin{lemma}\label{Katlem}
For every positive integer $k$ we have
\[
\sum_{p < x} \left|\sigma_b(p) - \mu_b \log_b x\right|^k \ll_k x(\log x)^{k/2 - 1}.
\]
\end{lemma}
Now, even assuming the Riemann hypothesis and the pair correlation conjecture \cite{GHB}
we cannot prove that every interval $\ci_y = [y- y^{\frac12} (\log y)^{\frac13}, y+ y^{\frac12} (\log y)^{\frac13}]$ contains a 
prime number.  Suppose that $\ci_y$ contains no primes, and choose $X$ so that $y = \mu_b \log_b X$.  
Then Theorem~\ref{DMR} cannot supply an asymptotic formula for $\pi_b^*(X)$, and by Lemma~\ref{Katlem} with $k=3A$
\[
\pi_b^*(X) \ll_A \frac{X}{X L_1(X) (L_2(X))^A}
\]
for every positive integer $A$. This shows that no asymptotic formula is possible unless a stronger result than can be
obtained on the Riemann hypothesis is assumed, for clearly $\pi_b^*(X)$ should be of size $X(L_1(X) L_2(X))^{-1}$. We
state here a hypothetical result to illustrate what we mean.
\begin{theorem}\label{hyp}
Suppose that there exists a positive function $f(x) \rightarrow 0$ such that for all large $x$ every interval of the form
$[x, x+x^{\frac12} f(x)]$ contains $f(x) x^{\frac12}(1+o(1))(\log x)^{-1}$ primes.  Then
\begin{equation}\label{expect}
\pi_b^*(X) \sim \frac{b-1}{\phi(b-1)} \frac{X}{L_1(X) L_2(X)} .
\end{equation}
\end{theorem}
The proof of this result is straightforward. By Lemma~\ref{Katlem} we need only count primes whose sum of digits is $q$ with
\begin{equation}\label{con}
|q-\mu_b \log_b x| < (\log_b x)^{\frac23}.
\end{equation}
Applying Theorem~\ref{DMR} we need to estimate
\[
\sum_{\eqref{con}} \exp\left(- \frac{(q-\mu_b \log_b x)^2}{2 s_b \log_b x}  \right).
\]
Breaking the summation range into shorter ranges of the form $[y,y+y^{\frac12}f(y)]$ and then comparing a sum with an integral
we obtain
\[
\frac{1+o(1)}{\log(\mu_b \log_b x)} \int_{-\infty}^{\infty} \exp\left(- \frac{t^2}{2 s_b \log_b x}  \right)\, dt
=
\frac{1+o(1)}{\log(\mu_b \log_b x)}\sqrt{2s_b \pi \log_b x},
\]
which quickly establishes the result.

We remark that the hypothesis of Theorem~\ref{hyp}
is ``almost always'' true unconditionally in the following sense.
The method of Heath-Brown \cite{HB} shows that, given $\epsilon > 0$, there exists
$\eta>0$ such that for
all integers $y \in[Y,2Y)$, with at most $\ll Y^{\frac34 + \epsilon}$ exceptions, every subinterval
$[z,z+ z^{\frac12-\eta}]$ of $[y-Y^{\frac12 + \epsilon}, y+ Y^{\frac12+\epsilon}]$ 
contains $z^{\frac12-\eta}(1+O((\log z)^{-1}))(\log z)^{-1}$ primes. For these non-exceptional $y$ and with $X$ 
chosen to satisfy $y = \mu \log_b X$ we then have \eqref{expect}. Indeed, we can obtain the expected asymptotic formula
for $X$ on quite long ranges.
We note that we are unable to use the stronger results obtainable using a sieve method \cite{Kai}
since we require an asymptotic formula for the number of primes in an interval rather than upper or lower bounds. 

\section{Proof of Theorem~\ref{T2}}\label{prooft2}
Lemma~\ref{Katlem}, together with \eqref{DMJ}, shows that $\sigma_b(p)$ is concentrated near its mean value $\mu_b \log_b p$,
and so enables us to restrict our attention to certain $p \in \cp_b$ with negligible error.
We write
\begin{eqnarray*}
&M(m) = \exp\left((\log b) m^{\frac78}\right), \qquad &\mu(m) = \mu_b m^{\frac78},\\
 &\epsilon(m) = \displaystyle{\frac{M(m+1) - M(m)}{M(m)},} \qquad &\delta(m) = (m+1)^{\frac78} - m^{\frac78},
\end{eqnarray*}
and put $\cm(m) = [M(m),M(m+1))$.
We note for future reference that
\begin{equation}\label{eps}
\epsilon(m) =O\left(m^{-\frac18}\right) \quad \text{and} \ \  \epsilon(m) = \delta(m) \left(\log b + O\left(m^{-\frac18}\right)\right).
\end{equation}
Let $V$ be the largest integer with $M(V) <X$.  Also, write
\[
\cp(m) = \{p\in \cp_b: \sigma_b(p) = q, |q-\mu(m)| < q^{\frac23}\}.
\]
We remark that the exponents $\frac78, \frac23$ above are chosen as convenient fixed numbers between $\frac12$ and $1$ (which must satisfy 
certain inequalities to enable certain sums to converge later)
and have no other special significance.
From Lemma~\ref{Katlem} we can deduce that
\[
\sum_{m=1}^{\infty} \twolinesum{p \in \cm(m)}{p \in \cp_b, p \notin \cp(m)} \frac{1}{p}
\]
converges.  Hence
\[
S(X) := \twolinesum{p < X}{p \in \cp_b} \frac{1}{p} = \sum_{m \le V} \frac{1}{M(m)} \twolinesum{p \in \cm(m)}{p \in \cp(m)} 1  + O(1).
\]
Now write $Q = \log_b X$.  Then
\[
S(X) = \sum_{q < Q} \twolinesum{m \le V}{|q-\mu(m)|< q^{\frac23}} \frac{1}{M(m)} \twolinesum{p \in \cm(m)}{\sigma_b(p) = q} 1 + O(1).
\]
After applications of Theorem~\ref{DMR} with $\epsilon = 1/14$ and using \eqref{PNT} this becomes
\begin{equation}\label{xz}
\sum_{q < Q} \twolinesum{m \le V}{|q-\mu(m)|< q^{\frac23}} \frac{b-1}{\phi(b-1)} \frac{\epsilon(m)}{(\log b) m^{\frac78} \sqrt{2 \pi s_b m^{\frac78}}}
\left(\exp\left(-\frac{(q-\mu(m))^2}{2s_b m^{\frac78}}  \right) + O(m^{-\frac14})\right).
\end{equation}
Now the $O(m^{-\frac14})$ term in the above leads to an error (using $m= (q/\mu_b)^{\frac87} + O(q^{\frac87-\frac13})$)
\[
\ll \sum_{q < Q} q^{\frac87 - \frac13} q^{-\frac17} q^{-1} q^{-\frac12} q^{-\frac27} = O(1),
\]
and so we can concentrate on the main term in \eqref{xz}. By \eqref{eps} we can replace $\epsilon(m)$ with $\delta(m) \log b$ and only incur
an error which is $O(m^{-\frac18})$ times the main term. 
We can ignore this error as it will prove to be negligible.
By using $m^{\frac78} = q/\mu_b + O(q^{\frac23})$ we can
reduce the sum to
\[
\frac{b-1}{\phi(b-1)} \frac{1}{\sqrt{2 \pi s_b}} \sum_{q < Q} \left(\mu_b/q\right)^{\frac32} \Sigma_q + O(1),
\]
with
\[
\Sigma_q = \twolinesum{m \le V}{|q-\mu(m)|< q^{\frac23}} \delta(m) 
\exp\left(-\frac{\mu_b(q-\mu(m))^2}{2s_bq}  \right). 
\]
We can identify this as a Riemann sum of an integral and so obtain
\[
\begin{split}
\Sigma_q &= \int_{q/\mu_b - q^{\frac23}}^{q/\mu_b+q^{\frac23}} \exp\left(-\frac{\mu_b (q-\mu_b x)^2}{2s_bq}  \right) \, dx + O(1)\\
&=
\frac{\sqrt{2s_bq \pi}}{\mu_b^{\frac32}} + O(1).
\end{split}
\]
We have thus shown that
\begin{equation}\label{case1}
S(X) = \frac{b-1}{\phi(b-1)} \sum_{q < Q} \frac{1}{q} + O(1) = \frac{b-1}{\phi(b-1)} L_3(X) + O(1)
\end{equation}
as required to complete the proof.


\section{Proof of Theorem~\ref{T3}}
We prove Theorem~\ref{T3} by induction; Theorem~\ref{T2} is the case $j=1$.  Now assume that the case $j$ has been proved. If we follow the working
in \S \ref{prooft2}, then in place of \eqref{case1} we obtain
\[
\twolinesum{p < X}{p \in \cp_b^{(j+1)}} \frac{1}{p} = \frac{b-1}{\phi(b-1)}  
\twolinesum{p < Y}{p \in \cp_b^{(j)}} \frac{1}{p} + O(1)
\]
where $Y = \log_b X$.  By the inductive hypothesis we therefore have
\[
\twolinesum{p < X}{p \in \cp_b^{(j+1)}} \frac{1}{p} = \left(\frac{b-1}{\phi(b-1)}\right)^{j+1} L_{j+3}(X) + O(1)
\]
as required to complete the proof.

\section{Acknowledgement}
I would like to thank the referee for his suggestions and Christian Elsholtz whose e-mail conversation sparked my interest in this topic.

\begin{thebibliography}{9}% 
\bibitem{5}
{H. Davenport}, {\em Multiplicative Number Theory}, 2nd edition, revised by
H. L. Montgomery, Springer, 1980.

\bibitem{Drm}
{M. Drmota, C. Mauduit and J. Rivat}, Primes with an average sum of digits,
{\em Compositio Math.} \textbf{145} (2009), 271--292.

\bibitem{GHB}
{D. Goldston and D. R. Heath-Brown}, A note on the difference between consecutive primes,
{\em Math. Ann.} \textbf{266} (1984), 317--320.

\bibitem{HB}
{D. R. Heath-Brown}, The differences between consecutive primes, III, 
{\em J. London Math. Soc.} (2), \textbf{20}, 177--178.

\bibitem{Kat}
{I. K\'atai}, On the sum of digits of primes, {\em Acta Math. Acad.
Sci. Hungar.} \textbf{30} (1977), 169--173.

\bibitem{Kai}
{K. Matom\"aki}, Large differences between consecutive primes,
{\em Quart. J. Math.} \textbf{58} (2007), 489--517.

\bibitem{Mau}
{C. Mauduit and J. Rivat}, Sur un probl\`eme de Gelfond: la somme des
chiffres des nombres premiers, {\em Ann. of Math.} \textbf{171} (2010),
1591--1646.

\end{thebibliography}

\bigskip
\hrule
\bigskip

\noindent 2010 {\it Mathematics Subject Classification}:
Primary 11A63; Secondary 11N05, 11N60.

\noindent \emph{Keywords: } prime number, sum of digits, Mertens' theorem.

\bigskip
\hrule
\bigskip

\noindent (Concerned with sequences
\seqnum{A046704},
\seqnum{A070027}, and
\seqnum{A109981}.)

\bigskip
\hrule
\bigskip

\vspace*{+.1in}
\noindent
Received September 16 2011;
revised version received  December 29 2011.
Published in {\it Journal of Integer Sequences}, December 30 2011.

\bigskip
\hrule
\bigskip

\noindent
Return to
\htmladdnormallink{Journal of Integer Sequences home page}{http://www.cs.uwaterloo.ca/journals/JIS/}.
\vskip .1in


\end{document}